library(survival)
library(MASS)
library(xtable)

# Set global parameters
mu.y11 <- 2
mu.y10 <- 0
mu.y01 <- 1
mu.y00 <- 0.5
mu.m1 <- 1
mu.m0 <- 0
mu <- c(mu.y11, mu.y10, mu.y01, mu.y00, mu.m1, mu.m0) # means

M.cut <- 1/2

sd.y11 <- 1
sd.y10 <- 1
sd.y01 <- 1
sd.y00 <- 1
sd.m1 <- 1
sd.m0 <- 1
sd <- c(sd.y11, sd.y10, sd.y01, sd.y00, sd.m1, sd.m0) # standard deviations

# Set dependence parameters
cor.y11y10 <- cor.y11y01 <- cor.y11y00 <- cor.y10y01 <- cor.y10y00 <- cor.y01y00 <- 0.5
cor.y11m1 <- cor.y11m0 <- cor.y10m1 <- cor.y10m0 <- cor.y01m1 <- cor.y01m0 <- cor.y00m1 <- cor.y00m0 <- cor.m1m0 <- 0

cormat <- matrix(c(1, cor.y11y10, cor.y11y01, cor.y11y00, cor.y11m1, cor.y11m0,
                cor.y11y10, 1, cor.y10y01, cor.y10y00, cor.y10m1, cor.y10m0,
                cor.y11y01, cor.y10y01, 1, cor.y01y00, cor.y01m1, cor.y01m0,
                cor.y11y00, cor.y10y00, cor.y01y00, 1, cor.y00m1, cor.y00m0,
                cor.y11m1, cor.y10m1, cor.y01m1, cor.y00m1, 1, cor.m1m0,
                cor.y11m0, cor.y10m0, cor.y01m0, cor.y00m0, cor.m1m0, 1),
                nrow = 6, byrow=T) # correlation matrix
covmat <- diag(sd) %*% cormat %*% diag(sd) # var-cov matrix
    
# Parametric model
alpha3 <- 0.5
beta3 <- -0.5
gamma <- 0.5
kappa <- 1.5
alpha2 <- -0.5
beta2 <- 1
sigma3 <- 1

# True delta_0
delta_0.true <- delta_0.p.true <- (pnorm(alpha2 + beta2) - pnorm(alpha2))*
    (exp(alpha3 + gamma + .5*sigma3) - exp(alpha3 + .5*sigma3))

# True delta_1
delta_1.true <- delta_1.p.true <- (pnorm(alpha2 + beta2) - pnorm(alpha2))*
    (exp(alpha3 + beta3 + gamma + kappa + .5*sigma3) - exp(alpha3 + beta3 + .5*sigma3))

thm1sim <- function(nsamp){
    nsim <- 50000
    delta_0.est <- rep(NA, nsim)
    delta_1.est <- rep(NA, nsim)
    vardelta_0.est <- rep(NA, nsim)
    vardelta_1.est <- rep(NA, nsim)
    delta_0.p.est <- rep(NA, nsim)
    delta_1.p.est <- rep(NA, nsim)
    vardelta_0.p.est <- rep(NA, nsim)
    vardelta_1.p.est <- rep(NA, nsim)

    for(i in 1:nsim){
        # Draw from the population model
        samp <- mvrnorm(nsamp, mu, covmat)
        samp <- as.data.frame(cbind(samp, c(rep(1,nsamp/2), rep(0,nsamp/2))))
        names(samp) <- c("Y11","Y10","Y01","Y00","M1","M0","T")
        
        # Exponentiate Y
        samp[,1:4] <- exp(samp[,1:4])

        # Recode M to be binary
        samp$M1 <- ifelse(samp$M1 > M.cut,1,0)
        samp$M0 <- ifelse(samp$M0 > M.cut,1,0)
        
        # Compute observed strata
        samp$M <- ifelse(samp$T==1, samp$M1, samp$M0)
        samp$Y <- ifelse(samp$T==1 & samp$M==1, samp$Y11,
                    ifelse(samp$T==1 & samp$M==0, samp$Y10,
                    ifelse(samp$T==0 & samp$M==1, samp$Y01, samp$Y00)))
                    
        # Compute parts
        n1 <- sum(samp$T)
        n0 <- sum(1-samp$T)
        n11 <- sum(samp$T * samp$M)
        n10 <- sum(samp$T * (1-samp$M))
        n01 <- sum((1-samp$T) * samp$M)
        n00 <- sum((1-samp$T) * (1-samp$M))
        lambda00 <- sum((1-samp$M) * (1-samp$T))/n0
        lambda01 <- sum(samp$M * (1-samp$T))/n0
        lambda10 <- sum((1-samp$M) * samp$T)/n1
        lambda11 <- sum(samp$M * samp$T)/n1
        mu00 <- sum(samp$Y*(1-samp$M)*(1-samp$T))/n00
        mu01 <- sum(samp$Y*samp$M*(1-samp$T))/n01
        mu10 <- sum(samp$Y*(1-samp$M)*samp$T)/n10
        mu11 <- sum(samp$Y*samp$M*samp$T)/n11
        xi00 <- mean(samp$Y[samp$T==0]*(samp$M[samp$T==0]==0)) 
        xi01 <- mean(samp$Y[samp$T==0]*(samp$M[samp$T==0]==1)) 
        xi10 <- mean(samp$Y[samp$T==1]*(samp$M[samp$T==1]==0)) 
        xi11 <- mean(samp$Y[samp$T==1]*(samp$M[samp$T==1]==1)) 
        eta1 <- mean(samp$Y[samp$T==1])
        eta0 <- mean(samp$Y[samp$T==0])
        
        # Compute delta 1 and 0 using Theorem 1 (equations 26 and 27)
        delta_0.est[i] <- sum(samp$T*samp$M)*sum(samp$Y*(1-samp$T)*samp$M) / (n1*sum((1-samp$T)*samp$M)) +
             sum(samp$T*(1-samp$M))*sum(samp$Y*(1-samp$T)*(1-samp$M)) / (n1*sum((1-samp$T)*(1-samp$M))) - 
             sum((1-samp$T)*samp$Y)/n0
        delta_1.est[i] <- sum(samp$T*samp$Y)/n1 - 
            sum((1-samp$T)*samp$M)*sum(samp$Y*samp$T*samp$M)/(n0*sum(samp$T*samp$M)) -
            sum((1-samp$T)*(1-samp$M))*sum(samp$Y*samp$T*(1-samp$M))/(n0*sum(samp$T*(1-samp$M)))
        # Compute variance using Theorem 3 - 2
        vardelta_0.est[i] <- (1/n0)*(lambda10*(((lambda10/lambda00) - 2)*var(samp$Y[samp$T==0 & samp$M==0]) + (n0*(1-lambda10)*(mu00)^2)/n1) + lambda11*(((lambda11/lambda01) - 2)*var(samp$Y[samp$T==0 & samp$M == 1]) + n0*(1-lambda11)*mu01^2/n1)) - (2/n1)*lambda10*lambda11*mu00*mu01 + var(samp$Y[samp$T == 0])/n0
        vardelta_1.est[i] <- (1/n1)*(lambda00*(((lambda00/lambda10) - 2)*var(samp$Y[samp$T==1 & samp$M==0]) + (n1*(1-lambda00)*(mu10)^2)/n0) + lambda01*(((lambda01/lambda11) - 2)*var(samp$Y[samp$T==1 & samp$M == 1]) + n1*(1-lambda01)*mu11^2/n0)) - (2/n0)*lambda00*lambda01*mu10*mu11 + var(samp$Y[samp$T == 1])/n1
        
        # Parametric Models
        mod.1 <- glm(samp$M ~ samp$T, family=binomial("probit"))
        mod.2 <- survreg(Surv(samp$Y) ~ samp$T + samp$M + samp$T:samp$M, dist="lognormal")
        
        # M term point estimate
        mod.coef.m <- mod.1$coef
        e.m.1 <- pnorm(sum(c(1,1) * mod.coef.m))
        e.m.0 <- pnorm(sum(c(1,0) * mod.coef.m))
        e.m <- e.m.1 - e.m.0
        
        # Y term point estimates
        mod.coef.y <- c(mod.2$coef, mod.2$scale)
        e.y.t0m1 <- exp(sum(c(1,0,1,0,.5) * mod.coef.y))
        e.y.t0m0 <- exp(sum(c(1,0,0,0,.5) * mod.coef.y))
        e.y.t1m1 <- exp(sum(c(1,1,1,1,.5) * mod.coef.y))
        e.y.t1m0 <- exp(sum(c(1,1,0,0,.5) * mod.coef.y))
        e.y.t0 <- e.y.t0m1 - e.y.t0m0
        e.y.t1 <- e.y.t1m1 - e.y.t1m0
        
        #M Variance Term via Delta Method
        var.cov.m <- vcov(mod.1)
        gradient.m <- c(dnorm(sum(c(1,1) * mod.coef.m)) - dnorm(sum(c(1,0) * mod.coef.m)),
            dnorm(sum(c(1,1) * mod.coef.m)))
        var.m <- t(gradient.m) %*% var.cov.m %*% gradient.m
    	
    	#Y Variance Term via Delta Method
    	var.cov.y <- cbind(rbind(mod.2$var[1:4,1:4], rep(0,4)), c(rep(0,4), exp(mod.2$scale)^2*mod.2$var[5,5]))
    	gradient.y.t1 <- c(e.y.t1, e.y.t1, e.y.t1m1, e.y.t1m1, e.y.t1/2)
    	gradient.y.t0 <- c(e.y.t0, e.y.t0m1, e.y.t0/2)
    	var.y.t1 <- t(gradient.y.t1) %*% var.cov.y %*% gradient.y.t1
    	var.y.t0 <- t(gradient.y.t0) %*% var.cov.y[c(1,3,5),c(1,3,5)] %*% gradient.y.t0
    	
    	delta_0.p.est[i] <- e.m*e.y.t0
    	delta_1.p.est[i] <- e.m*e.y.t1
    	vardelta_0.p.est[i] <- e.m^2*var.y.t0 + e.y.t0^2*var.m + var.m*var.y.t0
    	vardelta_1.p.est[i] <- e.m^2*var.y.t1 + e.y.t1^2*var.m + var.m*var.y.t1
        }
        
    # Remove NA's
    Draws <- as.data.frame(na.omit(cbind(delta_0.est, delta_1.est, vardelta_0.est, vardelta_1.est)))
    Draws <- subset(Draws, Draws$vardelta_0.est >= 0 & Draws$vardelta_1.est >= 0)
    delta_0.est <- Draws[,1]
    delta_1.est <- Draws[,2]
    vardelta_0.est <- Draws[,3]
    vardelta_1.est <- Draws[,4]
    
    # Point estimates
    delta_0.thm <- mean(delta_0.est)
    delta_1.thm <- mean(delta_1.est)
    vardelta_0.thm <- mean(vardelta_0.est)
    vardelta_1.thm <- mean(vardelta_1.est)
    vardelta_0.true <- var(delta_0.est)
    vardelta_1.true <- var(delta_1.est)
    
    # Point estimates for Parametric model
    delta_0.p.thm <- mean(delta_0.p.est)
    delta_1.p.thm <- mean(delta_1.p.est)
    vardelta_0.p.thm <- mean(vardelta_0.p.est)
    vardelta_1.p.thm <- mean(vardelta_1.p.est)
    vardelta_0.p.true <- var(delta_0.p.est)
    vardelta_1.p.true <- var(delta_1.p.est)

    # Compute coverage probabilities for the 90% and 95% CIs
    CI90.up.d0 <- delta_0.est + qnorm(0.95)*sqrt(vardelta_0.est)
    CI90.lo.d0 <- delta_0.est - qnorm(0.95)*sqrt(vardelta_0.est)
    CI95.up.d0 <- delta_0.est + qnorm(0.975)*sqrt(vardelta_0.est)
    CI95.lo.d0 <- delta_0.est - qnorm(0.975)*sqrt(vardelta_0.est)
    CI90.up.d1 <- delta_1.est + qnorm(0.95)*sqrt(vardelta_1.est)
    CI90.lo.d1 <- delta_1.est - qnorm(0.95)*sqrt(vardelta_1.est)
    CI95.up.d1 <- delta_1.est + qnorm(0.975)*sqrt(vardelta_1.est)
    CI95.lo.d1 <- delta_1.est - qnorm(0.975)*sqrt(vardelta_1.est)
    
    delta_0.covered.90 <- ifelse(CI90.lo.d0 < delta_0.true & delta_0.true < CI90.up.d0, 1, 0)
    delta_0.covered.95 <- ifelse(CI95.lo.d0 < delta_0.true & delta_0.true < CI95.up.d0, 1, 0)
    delta_1.covered.90 <- ifelse(CI90.lo.d1 < delta_1.true & delta_1.true < CI90.up.d1, 1, 0)
    delta_1.covered.95 <- ifelse(CI95.lo.d1 < delta_1.true & delta_1.true < CI95.up.d1, 1, 0)

    delta_0.covprob.90 <- sum(delta_0.covered.90)/nsim
    delta_0.covprob.95 <- sum(delta_0.covered.95)/nsim
    delta_1.covprob.90 <- sum(delta_1.covered.90)/nsim
    delta_1.covprob.95 <- sum(delta_1.covered.95)/nsim
    
    # Compute coverage probabilities for the 90% and 95% CIs for Parametric Model
    CI90.up.p.d0 <- delta_0.p.est + qnorm(0.95)*sqrt(vardelta_0.p.est)
    CI90.lo.p.d0 <- delta_0.p.est - qnorm(0.95)*sqrt(vardelta_0.p.est)
    CI95.up.p.d0 <- delta_0.p.est + qnorm(0.975)*sqrt(vardelta_0.p.est)
    CI95.lo.p.d0 <- delta_0.p.est - qnorm(0.975)*sqrt(vardelta_0.p.est)
    CI90.up.p.d1 <- delta_1.p.est + qnorm(0.95)*sqrt(vardelta_1.p.est)
    CI90.lo.p.d1 <- delta_1.p.est - qnorm(0.95)*sqrt(vardelta_1.p.est)
    CI95.up.p.d1 <- delta_1.p.est + qnorm(0.975)*sqrt(vardelta_1.p.est)
    CI95.lo.p.d1 <- delta_1.p.est - qnorm(0.975)*sqrt(vardelta_1.p.est)
    
    delta_0.p.covered.90 <- ifelse(CI90.lo.p.d0 < delta_0.p.true & delta_0.p.true < CI90.up.p.d0, 1, 0)
    delta_0.p.covered.95 <- ifelse(CI95.lo.p.d0 < delta_0.p.true & delta_0.p.true < CI95.up.p.d0, 1, 0)
    delta_1.p.covered.90 <- ifelse(CI90.lo.p.d1 < delta_1.p.true & delta_1.p.true < CI90.up.p.d1, 1, 0)
    delta_1.p.covered.95 <- ifelse(CI95.lo.p.d1 < delta_1.p.true & delta_1.p.true < CI95.up.p.d1, 1, 0)

    delta_0.p.covprob.90 <- sum(delta_0.p.covered.90)/nsim
    delta_0.p.covprob.95 <- sum(delta_0.p.covered.95)/nsim
    delta_1.p.covprob.90 <- sum(delta_1.p.covered.90)/nsim
    delta_1.p.covprob.95 <- sum(delta_1.p.covered.95)/nsim

    # Bias and RMSE
    bias.delta_0 <- delta_0.thm - delta_0.true
    bias.delta_1 <- delta_1.thm - delta_1.true

    RMSE.delta_0 <- sqrt(mean((delta_0.est - delta_0.true)^2))
    RMSE.delta_1 <- sqrt(mean((delta_1.est - delta_1.true)^2))

    # Bias and RMSE for parametric model
    bias.delta_0.p <- delta_0.p.thm - delta_0.p.true
    bias.delta_1.p <- delta_1.p.thm - delta_1.p.true

    RMSE.delta_0.p <- sqrt(mean((delta_0.p.est - delta_0.p.true)^2))
    RMSE.delta_1.p <- sqrt(mean((delta_1.p.est - delta_1.p.true)^2))

    # Summarize results
    res.0 <- c(delta_0.true, delta_0.thm, vardelta_0.true, vardelta_0.thm, nsamp, bias.delta_0, RMSE.delta_0, delta_0.covprob.90, delta_0.covprob.95)
    res.1 <- c(delta_1.true, delta_1.thm, vardelta_1.true, vardelta_1.thm, nsamp, bias.delta_1, RMSE.delta_1, delta_1.covprob.90, delta_1.covprob.95)
    res.0.p <- c(delta_0.p.true, delta_0.p.thm, vardelta_0.p.true, vardelta_0.p.thm, nsamp, bias.delta_0.p, RMSE.delta_0.p, delta_0.p.covprob.90, delta_0.p.covprob.95)
    res.1.p <- c(delta_1.p.true, delta_1.p.thm, vardelta_1.p.true, vardelta_1.p.thm, nsamp, bias.delta_1.p, RMSE.delta_1.p, delta_1.p.covprob.90, delta_1.p.covprob.95)
    res <- rbind(res.0, res.1, res.0.p, res.1.p)
    colnames(res) <- c("delta.true", "delta.est", "vardelta.true", "vardelta.est", "n", "bias", "RMSE", "cov.prob.90", "cov.prob.95")
    res
}

set.seed(1)

nsamps <- c(50,100,500)
res <- as.list(rep(NA, length(nsamps)))

for(i in seq(along=nsamps)){
res[[i]] <- thm1sim(nsamps[i])
cat("Sample Size: ", res[[i]][1,5], "\n")
cat("True Delta: ", res[[i]][,1], "\n")
cat("Estimated Delta: ", res[[i]][,2], "\n")
cat("Bias: ", res[[i]][,6], "\n")
cat("RMSE: ", res[[i]][,7], "\n")
cat("True Variance: ", res[[i]][,3], "\n")
cat("Estimated Variance: ", res[[i]][,4], "\n")
cat("90% Coverage Probability: ", res[[i]][,8], "\n")
cat("95% Coverage Probability: ", res[[i]][,9], "\n")
cat("\n")
}

result.tab <- matrix(NA, ncol=7, nrow=2*length(nsamps))
for(i in 1:length(res)){
    result.tab[i,1] <- res[[i]][1,5]
    result.tab[i,2:4] <- res[[i]][1,c(6,7,9)]
    result.tab[i,5:7] <- res[[i]][3,c(6,7,9)]
    result.tab[i+length(res),1] <- res[[i]][2,5]
    result.tab[i+length(res),2:4] <- res[[i]][2,c(6,7,9)]
    result.tab[i+length(res),5:7] <- res[[i]][4,c(6,7,9)]
}
rowhead <- c("$\\hat\\delta(0)$",rep("", length(nsamps)-1),"$\\hat\\delta(1)$",rep("", length(nsamps)-1))
result.tab <- data.frame(rowhead,result.tab)
colnames(result.tab) <- c("Estimator", "Sample Size","Bias","RMSE", "95\\% CI Coverage","Bias", "RMSE", "95\\% CI Coverage")

res.xtab <- xtable(result.tab, align="lllcccccc", digits=3, display=c("s","s","d","f","f","f","f","f","f"))
print(res.xtab, sanitize.text = function(x) x, file="thm1sim.tex", hline.after=c(-1,0,length(nsamps),nrow(res.xtab)), include.rownames=F)

